package com.kdtech.analyse.video;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import com.kdtech.analyse.AnalyseNews;
import com.kdtech.analyse.JSoupUtils;
import com.kdtech.crawler.CrawlHTML;
import com.kdtech.entity.crawler.UrlMeta;
import com.kdtech.entity.data.NewsMeta;
import com.kdtech.utils.DateUtils;
import com.kdtech.utils.StringUtils;
/**
 * http://www.youku.com/ 优酷网
 * @author allen
 */
public class YouKuAnalyse implements AnalyseNews {

	
	public boolean isDetailPage(String url) {
		boolean bRet = false;
		String[] regex = {
				"http://v.youku.com/v_show/.*.html.*"
				};
		for (int i = 0; i < regex.length; i++) {
			if (url.matches(regex[i])) {
				return true;
			}
		}
		return bRet;
	}

	
	public NewsMeta parserHtml(UrlMeta urlMeta) {
		NewsMeta video = new NewsMeta();
		if (urlMeta.getHtml() == null) {
		}
		String htmltxt = urlMeta.getHtml();
		String url = urlMeta.getUrl();
		if(!isDetailPage(url)){
		}
		video.setUrl(url);
		String title = null;
		String desc = null;
		String author = null;
		Long date = null;
		Document doc = Jsoup.parse(htmltxt);
			title =doc.select("div.s_main > div#vpvideotitlev5_wrap > div#vpvideotitlev5 > div.base > div.base_info > h1.title > span#subtitle").text();
			if(StringUtils.isBlank(title)){
				title =doc.select("div.s_main > div#vpvideotitle_wrap > div#vpvideotitle > div.base > div.base_info > h1.title > span#subtitle").text();
				if(StringUtils.isBlank(title)){
					title =doc.select("div.s_main > div#vpofficialtitle_wrap > div#vpofficialtitle > div.base > div.show_intro > h1.title > span#subtitle.subtitle").text();
					if(StringUtils.isBlank(title)){
						title =doc.select("span#subtitle").text();
						if(StringUtils.isBlank(title)){
							title =doc.select("h1.title").text();
							if(StringUtils.isBlank(title)){
							}
						}
					}
				}
			}
			desc=doc.select("div#text_short").text();
			if(desc==null || desc.trim().length()==0){
				desc=doc.select("div#long > div.item").text();
				if(desc==null || desc.trim().length()==0){
					desc=doc.select("span#show_info_short").text();
					if(desc==null || desc.trim().length()==0){
						desc=doc.select("div#text_long").text();
					}
				}
			}
			if(desc.indexOf("分类: ")!=-1){
				desc=desc.substring(0,desc.indexOf("分类: "));
			}
			String tempdate=doc.select("div.s_main > div.mainCol > div#vpvideoinfov5_wrap > div#vpvideoinfov5 > div.paikeArea > div.commentcon > div.con > div.panel > span.timestamp").text();
			if(tempdate==null || tempdate.trim().length()==0){
				tempdate=doc.select("div.s_main > div.right > div.infoArea > div#vpvideoinfo_wrap > div#vpvideoinfo > div.vInfo > div#vpvideoinfoBox.box > div.body > div.offical > div.pub > span.pub").text();
			}
			tempdate=tempdate.replace("发表于", "");
			date = DateUtils.matchDate(tempdate);
			if(date==null || date==null){
				tempdate=doc.select("div#player").html();
				if(htmltxt.indexOf("var stage=\"")!=-1){
					tempdate=htmltxt.substring(htmltxt.indexOf("var stage=\""));
					if(tempdate.indexOf(";")!=-1){
						tempdate=tempdate.substring(0,tempdate.indexOf(";"));
					}
				}
				date = DateUtils.matchDate(tempdate);
			}
			if(date==null ){
				date = JSoupUtils.matchDate(doc, "上传","发表于");
			}

			/**
			 * 解析用于更新的地址
			 */
			String updateUrl=null;
			if(htmltxt.indexOf("var videoId =")!=-1){
				String videoId=htmltxt.substring(htmltxt.indexOf("var videoId =")+"var videoId =".length());
				if(videoId.indexOf(";")!=-1){
					videoId=videoId.substring(0,videoId.indexOf(";"));
					videoId=videoId.replaceAll("'", "");
					videoId=videoId.replaceAll(" ", "");
					updateUrl="http://v.youku.com/v_vpactionInfo/id/"+videoId+"/pm/2/f/1?__rt=1&__ro=info_stat";
					video.setUpdateUrl(updateUrl);
					NewsMeta update = Update(video);
					if(update!=null){
						video.setCommentNum(update.getCommentNum());
						video.setClickNum(update.getClickNum());
					 }
				}
			}

			author=doc.select("html body.page_ugc div.window div.screen div.s_body div.s_main div.mainCol div#vpvideoinfov5_wrap div#vpvideoinfov5 div.paikeArea div.commentcon div.userInfo div.bar a.userName").text();
			if(StringUtils.isBlank(author)){
				author=doc.select("html body.page_v div.window div.screen div.s_body div.s_main div.mainCol div#vpvideoinfov5_wrap div#vpvideoinfov5 div.paikeArea div.commentcon div.userInfo div.bar a.userName").text();
			}
			if(StringUtils.isBlank(author)){
				author=doc.select("html body.page_list div.window div.screen div.s_body div.s_main div.mainCol div#vpvideoinfov5_wrap div#vpvideoinfov5 div.paikeArea div.commentcon div.userInfo div.bar a.userName").text();
			}
			if(StringUtils.isBlank(author)){
				author=doc.select("a.userName").text();
			}

			if (desc!=null && StringUtils.containsAny(desc, "稍后补充视频简介")){
				desc = null;
			}
			video.setTitle(StringUtils.trimSpace(title));
			video.setContent(desc);
			video.setDate(date);
			video.setAuthor(author);
		    return video;
	}
	
	public NewsMeta Update(NewsMeta meta) {
		if(meta!=null){
			String updateUrl = meta.getUpdateUrl();
			if(updateUrl!=null){
				UrlMeta responseToURL = CrawlHTML.responseToURL(updateUrl);
				if(responseToURL!=null){
					String html = responseToURL.getHtml();
					if(html!=null){
						Document doc = Jsoup.parse(html);

						String clickNumstr="";
						try {
							clickNumstr=doc.select("span.num").get(0).text();
						} catch (Exception e) {
							// TODO: handle exception
						}
						clickNumstr=clickNumstr.replaceAll(",", "");
						Integer clickNum=0;
						try {
							clickNum=Integer.parseInt(clickNumstr);
						} catch (Exception e) {
							// TODO: handle exception
						}
						meta.setClickNum(clickNum);

						String commentNumstr=doc.select("span#totalComment2").text();;
						commentNumstr=commentNumstr.replaceAll("'", "");

						Integer commentNum=0;
						try {
							commentNum=Integer.parseInt(commentNumstr);
						} catch (Exception e) {
							// TODO: handle exception
						}
						meta.setCommentNum(commentNum);
						return meta;
					}
				}
			}
		}
		return null;
	}
	public static void main(String[] args) throws Exception {
		YouKuAnalyse a = new YouKuAnalyse();
		String url = "http://v.youku.com/v_show/id_XMTI3MzE5NTUzMg%3D%3D.html";
//		String url="http://v.youku.com/v_show/id_XNTAyNTIxNDQ4.html";
		UrlMeta meta = CrawlHTML.responseToURL(url);
		NewsMeta parserHtml = a.parserHtml(meta);
		System.out.println(parserHtml);
//		String str="%7B%22videoid%22%3A%22120023337%22%2C%22userid%22%3A%2238249615%22%2C%22oldSid%22%3A-1%7D";
//		System.out.println(java.net.URLDecoder.decode(str, "utf-8"));
	}


	
}
